clear

// =================
// Create general empty file
// =================

import 	delimited ".\input\CUSP\patents_cpc_categories_from_1900.csv", clear
gen 	class_1d = upper(substr(cpc_class,1,1))
replace	class_1d = "0" if class_1d==""
gen 	n = 1 // placeholder
collapse (sum) n, by(class_1d)
set 	obs `=_N+1'
replace class_1d="0" if class_1d==""
drop 	n
tempfile cats
save 	"`cats'", replace

use 	"./output/intermediate/cities_patents_list", clear
gen 	n = 1 // placeholder
collapse (sum) n, by(f_m f_yr)
sort f_yr f_m
gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm
tsset f_myr
tsfill, full
keep if tin(1900m1, 1929m12)
keep f_myr
tempfile dates
save 	"`dates'", replace

clear
set obs 50
gen master_id = _n-1
replace master_id = 470 if master_id==43
replace master_id = 596 if master_id==44
replace master_id = 724 if master_id==45
replace master_id = 780 if master_id==46
replace master_id = 790 if master_id==47
replace master_id = 794 if master_id==48
replace master_id = 872 if master_id==49

cross using "`cats'"
cross using "`dates'"
compress

tempfile master_1d 
save 	"`master_1d'", replace


// =================
// Patent Classes, CLS cities 
// =================

import 	delimited ".\input\CUSP\patents_cpc_categories_from_1900.csv", clear
duplicates drop patnum,  force

tempfile cpc
save	"`cpc'", replace
clear

// Combine Dates/Inventors and Collapse by Month-Year-County

use 	"./output/intermediate/citiescls_patents_list"
  
merge	m:1 patnum using "`cpc'"
/* _merge=1: international patents
   _merge=2: outside date range */
drop if _merge==2
drop 	_merge     

drop if cls_id<400  // Ensure merge on cls_id (useful) and removes many irrelevant obs (faster)

// Prep Useful Vars
gen 	class_1d = upper(substr(cpc_class,1,1))

replace	class_1d = "0" if class_1d==""
codebook class_?d

// Collapse, etc.
collapse (sum) pat_*, by(f_m f_yr cls_id class_1d)
compress

sort 	f_yr f_m class_1d cls_id  

gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

keep 	f_myr class_1d cls_id pat_*
rename  cls_id master_id

tempfile cls_cities_cats
save 	"`cls_cities_cats'", replace


// =================
// Patent Data, NPI cities
// =================

import 	delimited ".\input\CUSP\patents_cpc_categories_from_1900.csv", clear
duplicates drop patnum,  force

tempfile cpc
save	"`cpc'", replace
clear

// Combine Dates/Inventors and Collapse by Month-Year-County

use 	"./output/intermediate/cities_patents_list"
  
merge	m:1 patnum using "`cpc'"
/* _merge=1: international patents
   _merge=2: outside date range */
drop if _merge==2
drop 	_merge     

*****************************  
** DEFINE RELEVANT CLASSES **  
gen 	class_1d = upper(substr(cpc_class,1,1))

replace	class_1d = "0" if class_1d==""
codebook class_?d
***************************** 

collapse (sum) pat_*, by(f_m f_yr npi_id class_1d)
compress

sort 	f_yr f_m class_1d npi_id  

gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

keep 	f_myr class_1d npi_id pat_*
rename  npi_id master_id

merge 1:1 f_myr class_1d master_id using "`master_1d'"
drop if _merge==1 // These are out of date range
drop 	_merge

sort 	f_myr master_id class_1d

merge 1:1 f_myr class_1d master_id using "`cls_cities_cats'"
drop if _merge==2 // These are not the right cities
drop 	_merge

unab pvars : pat_*

foreach v of local pvars {
	replace `v' = 0 if mi(`v')
}

keep if tin(1900m1, 1929m12)

compress 
save 	"./output/intermediate/citiesall_patents_longclass1d_19001929", replace

clear
